# Setup environment ----
library(data.table)
library(bigKRLS)
source("R/functions.R")
results_path <- "results/replication/"
load("data/data-2021-02-22.RData")

# House KRLS table (S1) ----
load.bigKRLS(paste0(results_path, "house_krls_model"),
  newname = "house_krls_model")
house_krls_sum <- summary(house_krls_model)
house_krls_sum$ttests <- as.data.table(house_krls_sum$ttests)
house_krls_sum$percentiles <- as.data.table(house_krls_sum$percentiles)
hou_index <- rbind(
  c(1, "Democratic Spending Advantage"),
  c(2, "$\\text{log}$(Total Spending)"),
  c(3, "Democratic Presidential Vote Advantage"),
  c(4, "Ideological Distance"),
  c(5, "Democrat Candidate's CF Score"),
  c(6, "Republican Candidate's CF Score"),
  c(7, "Dem. Inc./Low Qual. Chall."),
  c(8, "Dem. Inc./High Qual. Chall."),
  c(9, "Rep Inc./Low Qual. Chall."),
  c(10, "Rep Inc./High Qual. Chall."),
  c(11, "Open Seat/Both High Qual."),
  c(12, "Open Seat/High Qual. Dem/Low Qual. Rep"),
  c(13, "Open Seat/Low Qual. Dem/High Qual. Rep"),
  c(14, "Year = 1980"),
  c(15, "Year = 1982"),
  c(16, "Year = 1984"),
  c(17, "Year = 1986"),
  c(18, "Year = 1988"),
  c(19, "Year = 1990"),
  c(20, "Year = 1992"),
  c(21, "Year = 1994"),
  c(22, "Year = 1996"),
  c(23, "Year = 1998"),
  c(24, "Year = 2000"),
  c(25, "Year = 2002"),
  c(26, "Year = 2004"),
  c(27, "Year = 2006"),
  c(28, "Year = 2008"),
  c(29, "Year = 2010"),
  c(30, "Year = 2012"),
  c(31, "Year = 2014"),
  c(32, "Year = 2016"),
  c(33, "Year = 2018"),
  c(34, "Bottom 5\\%"),
  c(38, "Middle 90\\%"),
  c(35, "Top 5\\%"),
  c(36, "Bottom 5\\% $\\times$ Dem. Spending Adv."),
  c(37, "Top 5\\% $\\times$ Dem. Spending Adv.")
)
sink("tables/table-s1.txt")
for (i in 1:nrow(hou_index)) {
  make_outcome_row(house_krls_sum$ttests[as.numeric(hou_index[i, 1]), ],
    hou_index[i, 2])
}
sink()

# Senate KRLS table (S2) ----
load.bigKRLS(paste0(results_path, "senate_krls_model"),
  newname = "senate_krls_model")
senate_krls_sum <- summary(senate_krls_model)
senate_krls_sum$ttests <- as.data.table(senate_krls_sum$ttests)
senate_krls_sum$percentiles <- as.data.table(senate_krls_sum$percentiles)
sen_index <- rbind(
  c(1, "Democratic Spending Advantage"),
  c(2, "$\\text{log}$(Total Spending)"),
  c(3, "Democratic Presidential Vote Advantage"),
  c(4, "Adj. Dem. Pres. Vote Advantage"),
  c(5, "$\\text{log}$($n$ Votes for Democratic Presidential Candidate)"),
  c(6, "$\\text{log}$($n$ Votes for Republican Presidential Candidate)"),
  c(7, "Ideological Distance"),
  c(8, "Democrat Candidate's CF Score"),
  c(9, "Republican Candidate's CF Score"),
  c(10, "Open Seat"), c(11, "Democratic Incumbent"),
  c(12, "$\\text{log}$(Voting Eligible Population)"),
  c(13, "Year = 1980"), c(14, "Year = 1982"), c(15, "Year = 1984"),
  c(16, "Year = 1986"), c(17, "Year = 1988"), c(18, "Year = 1990"),
  c(19, "Year = 1992"), c(20, "Year = 1994"), c(21, "Year = 1996"),
  c(22, "Year = 1998"), c(23, "Year = 2000"), c(24, "Year = 2002"),
  c(25, "Year = 2004"), c(26, "Year = 2006"), c(27, "Year = 2008"),
  c(28, "Year = 2010"), c(29, "Year = 2012"), c(30, "Year = 2014"),
  c(31, "Year = 2016"), c(32, "Year = 2018"),
  c(33, "Bottom 5\\%"),
  c(37, "Middle 90\\%"),
  c(34, "Top 5\\%"),
  c(35, "Bottom 5\\% $\\times$ Dem. Spending Adv."),
  c(36, "Top 5\\% $\\times$ Dem. Spending Adv.")
)
sink("tables/table-s2.txt")
for (i in 1:nrow(sen_index)) {
  make_outcome_row(senate_krls_sum$ttests[as.numeric(sen_index[i, 1]), ],
    sen_index[i, 2])
}
sink()

# House descriptive stats table (S3) ----
load("data/data-2021-02-22.RData")
house_desc_stats <- house_data[, rbind(
  make_description_row(dem_vote_share, "Democratic Vote Share"),
  make_description_row(real_dem_expenditure_advantage_w_outside,
    "Democratic Expenditure Advantage (Millions of US\\$)"),
  make_description_row(log10(real_total_spending_w_outside),
    "(log) Total Expenditure (Millions of US\\$)"),
  make_description_row(dem_cfscore, "Democrat CF Score"),
  make_description_row(rep_cfscore, "Republican CF Score"),
  make_description_row(ideological_distance, "Ideological Distance"),
  make_description_row(adj_dem_presvote_advantage,
    "Adjusted Democratic Presidential Vote Advantage"),
  make_description_row(open_seat, "Open Seat"),
  make_description_row(dem_incumbent, "Democratic Incumbent"),
  make_description_row(quality_challenger, "Quality Challenger"),
  make_description_row(unopposed, "Unopposed"))]
sink("tables/table-s3.txt")
for (i in 1:nrow(house_desc_stats)) {
  house_desc_stats[i,
    cat(var, "&$", mean, "$&$", sd, "$&$", min, "$&$", max, "$&$", missing,
      "$\\\\\n", sep = "")]
}
sink()

# Senate descriptive stats table (S4) ----
senate_desc_stats <- senate_data[special == 0, rbind(
  make_description_row(dem_vote_share, "Democratic Vote Share"),
  make_description_row(real_dem_expenditure_advantage_w_outside,
    "Democratic Expenditure Advantage (Millions of US\\$)"),
  make_description_row(log10(real_total_spending_w_outside),
    "(log) Total Expenditure (Millions of US\\$)"),
  make_description_row(dem_cfscore,
    "Democrat CF Score"),
  make_description_row(rep_cfscore,
    "Republican CF Score"),
  make_description_row(ideological_distance,
    "Ideological Distance"),
  make_description_row(adj_dem_presvote_advantage,
    "Adjusted Democratic Presidential Vote Advantage"),
  make_description_row(voting_eligible_population / 1e6,
    "Voting Eligible Population (in millions)"),
  make_description_row(open_seat,
    "Open Seat"),
  make_description_row(dem_incumbent,
    "Democratic Incumbent"),
  make_description_row(jungle,
    "Jungle Primary"),
  make_description_row(weird_race,
    "Top Two General Election/Other Candidate/Etc."),
  make_description_row(is.na(dem_vote + rep_vote), "Unopposed"))]
sink("tables/table-s4.txt")
for (i in 1:nrow(senate_desc_stats)) {
  senate_desc_stats[i,
    cat(var, "&$", mean, "$&$", sd, "$&$", min, "$&$", max, "$&$", missing,
      "$\\\\\n", sep = "")]
}
sink()

# Make CV/OOS Table (S5) ----
load(paste0(results_path, "senate_krls_cv.RData"))
load(paste0(results_path, "senate_svm.RData"))
load(paste0(results_path, "senate_rf.RData"))
load(paste0(results_path, "senate_lasso.RData"))
load(paste0(results_path, "house_krls_cv.RData"))
load(paste0(results_path, "house_svm.RData"))
load(paste0(results_path, "house_rf.RData"))
load(paste0(results_path, "house_lasso.RData"))
oos_results <- CJ(
  chamber = c("House", "Senate"),
  method = c("KRLS", "SVM", "LASSO", "RF"),
  measure = c("RMSE", "R2"),
  value = NA_real_)
oos_results[chamber == "Senate" & method == "KRLS" & measure == "RMSE",
  value := mean(cv_rmse(senate_krls_cv, sen_folds, sen_y))]
oos_results[chamber == "Senate" & method == "KRLS" & measure == "R2",
  value := mean(cv_R2(senate_krls_cv, sen_folds, sen_y))]
oos_results[chamber == "Senate" & method == "RF" & measure == "RMSE",
  value := mean(cv_rmse(cv_rf_sen, sen_folds, sen_y))]
oos_results[chamber == "Senate" & method == "RF" & measure == "R2",
  value := mean(cv_R2(cv_rf_sen, sen_folds, sen_y))]
oos_results[chamber == "Senate" & method == "SVM" & measure == "RMSE",
  value := mean(cv_rmse(cv_svm_sen, sen_folds, sen_y))]
oos_results[chamber == "Senate" & method == "SVM" & measure == "R2",
  value := mean(cv_R2(cv_svm_sen, sen_folds, sen_y))]
oos_results[chamber == "Senate" & method == "LASSO" & measure == "RMSE",
  value := mean(cv_rmse(cv_lasso_sen, sen_folds, sen_y))]
oos_results[chamber == "Senate" & method == "LASSO" & measure == "R2",
  value := mean(cv_R2(cv_lasso_sen, sen_folds, sen_y))]
oos_results[chamber == "House" & method == "KRLS" & measure == "RMSE",
  value := mean(cv_rmse(house_krls_cv, hou_folds, hou_y))]
oos_results[chamber == "House" & method == "KRLS" & measure == "R2",
  value := mean(cv_R2(house_krls_cv, hou_folds, hou_y))]
oos_results[chamber == "House" & method == "RF" & measure == "RMSE",
  value := mean(cv_rmse(cv_rf_hou, hou_folds, hou_y))]
oos_results[chamber == "House" & method == "RF" & measure == "R2",
  value := mean(cv_R2(cv_rf_hou, hou_folds, hou_y))]
oos_results[chamber == "House" & method == "SVM" & measure == "RMSE",
  value := mean(cv_rmse(cv_svm_hou, hou_folds, hou_y))]
oos_results[chamber == "House" & method == "SVM" & measure == "R2",
  value := mean(cv_R2(cv_svm_hou, hou_folds, hou_y))]
oos_results[chamber == "House" & method == "LASSO" & measure == "RMSE",
  value := mean(cv_rmse(cv_lasso_hou, hou_folds, hou_y))]
oos_results[chamber == "House" & method == "LASSO" & measure == "R2",
  value := mean(cv_R2(cv_lasso_hou, hou_folds, hou_y))]
oos_results[, value := round(value, 3)]
oos_rmse <- dcast(
  oos_results[measure == "RMSE"], method ~ chamber, value.vars = "value")[
  c(1, 3, 4, 2)]
oos_R2 <- dcast(
  oos_results[measure == "R2"], method ~ chamber, value.vars = "value")[
  c(1, 3, 4, 2)]
sink("tables/table-s5-rmse.txt")
for (i in 1:4) {
  oos_rmse[i, cat(method, "&$", sprintf("%.03f", House), "$&$",
    sprintf("%.03f", Senate), "$\\\\\n")]
}
sink()
sink("tables/table-s5-R2.txt")
for (i in 1:4) {
  oos_R2[i, cat(method, "&$", sprintf("%.03f", House), "$&$",
    sprintf("%.03f", Senate), "$\\\\\n")]
}
sink()

# Make average marginal effects table (S6) ----
load(paste0(results_path, "house_krls_results.RData"))
load(paste0(results_path, "senate_krls_results.RData"))
ame_robustness <- rbind(
  senate_krls_results[bottom_tail + top_tail == 0, .(
    method = "KRLS", chamber = "Senate",
    Estimate = mean(d_dem_spend_adv), SE = calc_sem(d_dem_spend_adv, .N),
    pval = calc_p(d_dem_spend_adv, .N))],
  senate_rf_data[incl == TRUE, .(
    method = "RF", chamber = "Senate",
    Estimate = mean(d_dem_spend_adv), SE = calc_sem(d_dem_spend_adv, .N),
    pval = calc_p(d_dem_spend_adv, .N))],
  senate_svm_data[incl == TRUE, .(
    method = "SVM", chamber = "Senate",
    Estimate = mean(d_dem_spend_adv), SE = calc_sem(d_dem_spend_adv, .N),
    pval = calc_p(d_dem_spend_adv, .N))],
  senate_lasso_data[incl == TRUE, .(
    method = "LASSO", chamber = "Senate",
    Estimate = mean(d_dem_spend_adv), SE = calc_sem(d_dem_spend_adv, .N),
    pval = calc_p(d_dem_spend_adv, .N))],
  house_krls_results[bottom_tail + top_tail == 0, .(
    method = "KRLS", chamber = "House",
    Estimate = mean(d_dem_spend_adv), SE = calc_sem(d_dem_spend_adv, .N),
    pval = calc_p(d_dem_spend_adv, .N))],
  house_rf_data[incl == TRUE, .(
    method = "RF", chamber = "House",
    Estimate = mean(d_dem_spend_adv), SE = calc_sem(d_dem_spend_adv, .N),
    pval = calc_p(d_dem_spend_adv, .N))],
  house_svm_data[incl == TRUE, .(
    method = "SVM", chamber = "House",
    Estimate = mean(d_dem_spend_adv), SE = calc_sem(d_dem_spend_adv, .N),
    pval = calc_p(d_dem_spend_adv, .N))],
  house_lasso_data[incl == TRUE, .(
    method = "LASSO", chamber = "House",
    Estimate = mean(d_dem_spend_adv), SE = calc_sem(d_dem_spend_adv, .N),
    pval = calc_p(d_dem_spend_adv, .N))]
)
ames_robustness_summ <- ame_robustness[, .(method, chamber,
  Estimate = round(Estimate, 3), SE = as.character(round(SE, 3)),
  pval = as.character(round(pval, 3)))]
ames_robustness_summ[SE == "0", SE := "<0.001"]
ames_robustness_summ[pval == "0", pval := "<0.001"]
sink("tables/table-s6-house.txt")
for (i in 1:4) {
  ames_robustness_summ[chamber == "House"][i,
    cat(method, "&$", sprintf("%.03f", Estimate),
      "$&$", SE, "$&$", pval, "$\\\\\n")]
}
sink()
sink("tables/table-s6-senate.txt")
for (i in 1:4) {
  ames_robustness_summ[chamber == "Senate"][i,
    cat(method, "&$", sprintf("%.03f", Estimate),
      "$&$", SE, "$&$", pval, "$\\\\\n")]
}
sink()

# House KRLS w/ lagged outcome (S7) ----
load.bigKRLS(paste0(results_path, "house_krls_model_lag"),
  newname = "house_krls_model_lag")
house_krls_sum_lag <- summary(house_krls_model_lag)
house_krls_sum_lag$ttests <- as.data.table(house_krls_sum_lag$ttests)
house_krls_sum_lag$percentiles <-
  as.data.table(house_krls_sum_lag$percentiles)
hou_index_lag <- rbind(
  c(1, "Democratic Spending Advantage"),
  c(2, "$\\text{log}$(Total Spending)"),
  c(3, "Democratic Presidential Vote Advantage"),
  c(4, "Ideological Distance"),
  c(5, "Democrat Candidate's CF Score"),
  c(6, "Republican Candidate's CF Score"),
  c(7, "Dem. Inc./Low Qual. Chall."),
  c(8, "Dem. Inc./High Qual. Chall."),
  c(9, "Rep Inc./Low Qual. Chall."),
  c(10, "Rep Inc./High Qual. Chall."),
  c(11, "Year = 1984"),
  c(12, "Year = 1986"),
  c(13, "Year = 1988"),
  c(14, "Year = 1990"),
  c(15, "Year = 1994"),
  c(16, "Year = 1996"),
  c(17, "Year = 1998"),
  c(18, "Year = 2000"),
  c(19, "Year = 2004"),
  c(20, "Year = 2006"),
  c(21, "Year = 2008"),
  c(22, "Year = 2010"),
  c(23, "Year = 2014"),
  c(24, "Year = 2016"),
  c(25, "Year = 2018"),
  c(26, "Bottom 5\\%"),
  c(27, "Top 5\\%"),
  c(30, "Middle 90\\%"),
  c(28, "Bottom 5\\% $\\times$ Dem. Spending Adv."),
  c(29, "Top 5\\% $\\times$ Dem. Spending Adv."),
  c(31, "Lagged Outcome")
)
sink("tables/table-s7.txt")
for (i in 1:nrow(hou_index_lag)) {
  make_outcome_row(house_krls_sum_lag$ttests[as.numeric(hou_index_lag[i, 1]), ],
    hou_index_lag[i, 2])
}
sink()

# Senate KRLS w/ lagged outcome (S8) ----
load.bigKRLS(paste0(results_path, "senate_krls_model_lag"),
  newname = "senate_krls_model_lag")
senate_krls_sum_lag <- summary(senate_krls_model_lag)
senate_krls_sum_lag$ttests <- as.data.table(senate_krls_sum_lag$ttests)
senate_krls_sum_lag$percentiles <-
  as.data.table(senate_krls_sum_lag$percentiles)
sen_index_lag <- rbind(
  c(1, "Democratic Spending Advantage"),
  c(2, "$\\text{log}$(Total Spending)"),
  c(3, "Democratic Presidential Vote Advantage"),
  c(4, "Adjusted Democratic Presidential Vote Advantage"),
  c(5, "$\\text{log}$($n$ Votes for Democratic Presidential Candidate)"),
  c(6, "$\\text{log}$($n$ Votes for Republican Presidential Candidate)"),
  c(7, "Ideological Distance"),
  c(8, "Democrat Candidate's CF Score"),
  c(9, "Republican Candidate's CF Score"),
  c(10, "Democratic Incumbent"),
  c(11, "$\\text{log}$(Voting Eligible Population)"),
  c(12, "Year = 1986"),
  c(13, "Year = 1988"),
  c(14, "Year = 1990"),
  c(15, "Year = 1992"),
  c(16, "Year = 1994"),
  c(17, "Year = 1996"),
  c(18, "Year = 1998"),
  c(19, "Year = 2000"),
  c(20, "Year = 2002"),
  c(21, "Year = 2004"),
  c(22, "Year = 2006"),
  c(23, "Year = 2008"),
  c(24, "Year = 2010"),
  c(25, "Year = 2012"),
  c(26, "Year = 2014"),
  c(27, "Year = 2016"),
  c(28, "Year = 2018"),
  c(29, "Bottom 5\\%"),
  c(30, "Top 5\\%"),
  c(33, "Middle 90\\%"),
  c(31, "Bottom 5\\% $\\times$ Dem. Spending Adv."),
  c(32, "Top 5\\% $\\times$ Dem. Spending Adv."),
  c(34, "Lagged Outcome")
)
sink("tables/table-s8.txt")
for (i in 1:nrow(sen_index_lag)) {
  make_outcome_row(senate_krls_sum_lag$ttests[
    as.numeric(sen_index_lag[i, 1]), ], sen_index_lag[i, 2])
}
sink()

# House KRLS w/ dichotomous outcomes (S9) ----
load.bigKRLS(paste0(results_path, "house_krls_model_01"),
  newname = "house_krls_model_01")
house_krls_results_01 <- as.data.table(cbind(hou_X,
  house_krls_model_01$derivatives))
setnames(house_krls_results_01, c(colnames(hou_X),
  paste0("d_", colnames(hou_X))))
house_krls_sum_01 <- summary(house_krls_model_01)
house_krls_sum_01$ttests <- as.data.table(house_krls_sum_01$ttests)
house_krls_sum_01$percentiles <- as.data.table(house_krls_sum_01$percentiles)
sink("tables/table-s9.txt")
for (i in 1:nrow(hou_index)) {
  make_outcome_row(house_krls_sum_01$ttests[
    as.numeric(hou_index[i, 1]), ], hou_index[i, 2])
}
sink()

# Senate KRLS w/ dichotomous outcomes (S10) ----
load.bigKRLS(paste0(results_path, "senate_krls_model_01"),
  newname = "senate_krls_model_01")
senate_krls_results_01 <- as.data.table(cbind(sen_X,
  senate_krls_model_01$derivatives))
setnames(senate_krls_results_01, c(colnames(sen_X),
  paste0("d_", colnames(sen_X))))
senate_krls_sum_01 <- summary(senate_krls_model_01)
senate_krls_sum_01$ttests <- as.data.table(senate_krls_sum_01$ttests)
senate_krls_sum_01$percentiles <-
  as.data.table(senate_krls_sum_01$percentiles)
sink("tables/table-s10.txt")
for (i in 1:nrow(sen_index)) {
  make_outcome_row(senate_krls_sum_01$ttests[
    as.numeric(sen_index[i, 1]), ], sen_index[i, 2])
}
sink()
